Transition matrix
The model schematic illustrates treatment progression and the possible transitions between the states and the model structure. The first represents a semi-competing risks model, and the second represent a competing risk structure.
library(igraph)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
## 3 ESTADOS SIMPLES ##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
links<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 2),"Therapeutic\nDischarge"),
to = c(rep(c("Therapeutic\nDischarge"),1), rep("Readmission", 2)),
value = c("1","2","3"))
links2<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 3),"Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),
to = c(rep(c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),1), rep("Readmission", 3)),
value = c("1","2","3","4","5"))
#dev.off()
#https://www.r-graph-gallery.com/248-igraph-plotting-parameters.html
#https://rstudio-pubs-static.s3.amazonaws.com/341807_7b9d1a4e787146d492115f90ad75cd2d.html
par(mfrow=c(1, 2))
#for (i in c(2560:2660)){
set.seed(2630) #i #2660 #2646 #2642 #2630 #2650
co <- layout.fruchterman.reingold(graph_from_data_frame(links, directed=TRUE))
plot(graph_from_data_frame(links, directed=TRUE),
asp = 0,
layout= co,
edge.label=links$value,
edge.label.cex=3,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
#main=paste0(i),
outputorder="edgesfirst",
vertex.label.dist=0,
vertex.cex = 3)
#}
title("a) Three-states Model (Simplest)", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest;\nModified version of an illness-death model") ## internal titles
set.seed(10990)#i #10990 10921 10898 10835
co2 <- layout.fruchterman.reingold(graph_from_data_frame(links2, directed=TRUE))
plot(graph_from_data_frame(links2, directed=TRUE),
asp = 0,
layout= co2,
edge.label=links2$value,
edge.label.cex=2.5,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
outputorder="edgesfirst",
#main=paste0(i),
vertex.label.dist=0,
vertex.cex = 3)
title("b) Four-states Model", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest") ## internal titles

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
## Probando con paquetes estadísticos
if(no_mostrar==1){
library(igraph)
Nodes <- c("Admitted to\nGP","Admitted to\nWE","Therapeutic\nDischarge","Discharge w/o\nClinical Advice","Readmission") #states possible in MSM
Edges <- list("Admitted to\nGP"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Admitted to\nWE"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Therapeutic\nDischarge"=list(edges=c("Readmission")),
"Discharge /wo\nClinical Advice"=list(edges=c("Readmission")),
"Readmission"=list(edges=NULL)) #transitions from each state
RCLTtree <- new("graphNEL",nodes=Nodes,edgeL=Edges,edgemode="directed")
plot(RCLTtree)
#https://www.rdocumentation.org/packages/msSurv/versions/1.2-2/topics/msSurv
msSurv(LTRCdata, RCLTtree, cens.type="ind", LT=FALSE, bs=FALSE, B=200)
}
Any observation where an event occurs which is not the event of interest for a specific transition is treated as a censored at the end of the study (2019, November 13) observation (only referrals and also had inexact dates of discharge), that is, in the same way as a patient that was lost to follow-up. If there is a readmission posterior to loss-to follow-up cases, these cases we obtained the length in days between being readmitted posterior and the time of admission, knowing that any intermediate date of discharge was interval-censored (the exact time in which users discharged is unknown, and its treatment outcomes are unknown, so there is no exact time-to-readmission). Covariates are fixed at baseline. Some transitions were shown to be simultaneous (n= 132). Small adjustment such that transitions were sequential rather than simultaneous by adding one day to the absorbing event, and subtracting a day if the transition corresponded to an intermediate state.
The stacked format of the data allows to calculate hazards very simply by each transition defined earlier (trans).
#Data should be in a data frame with column names "id", "stop", "st.stage", and "stage" where "id" is the individual's identification number, "stop" is the transition time from state j to j', "st.stage" is the state the individual is transitioning from (i.e., j), and "stage" is the state the individual is transitioning to (i.e., j') and equals 0 if right censored.
## 80% of sample is LT, rest has start time of 0
### AGS: Parten en 0, salvo que estén truncados a la izquierda.
### Parece que todos comparten un mismo tiempo ojetivo.
### AGS: Cuando hay un estado seguido no es necesario interval censoring, se dn en tun tiempo continuo
### El 0 es censura
library(mstate)
#### MSPREP= 3 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. Si no no se ha observado
# para efectos de este modelo simple, experimentar el alta terapéutica es el objetivo, por lo que el resto será censura
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo. Le añado un día
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= dias_treat_imp_sin_na, #fech_ing_num+
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ dias_treat_imp_sin_na+ diff_bet_treat,
#if the first time is censored,
ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
#### MSPREP= 4 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. If not, it assumes not observed
dplyr::mutate(disch_wo_clin_adv=ifelse(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>%
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state of readmission
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(days_to_ther_disch= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(days_to_disch_wo_clin_adv= dplyr::case_when(disch_wo_clin_adv==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= days_to_ther_disch, #fech_ing_num+
date_disch_wo_clin_adv= days_to_disch_wo_clin_adv,
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ days_to_ther_disch+ diff_bet_treat,
disch_wo_clin_adv==1 & readmission==1~ days_to_disch_wo_clin_adv+ diff_bet_treat,
#if the first time is censored,
disch_wo_clin_adv==0 & ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
ther_disch==1 & disch_wo_clin_adv==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_disch_wo_clin_adv, disch_wo_clin_adv, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8a. Data in Wide, 3-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8a. Data in Wide, 3-states
|
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
|
21376
|
139,357
|
17,688
|
525
|
0
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
|
a Note= Proportions from the initial state
|
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8b. Data in Wide, 4-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8b. Data in Wide, 4-states
|
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_disch_wo_clin_adv
|
disch_wo_clin_adv
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
304
|
1
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
115
|
1
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
|
21376
|
139,357
|
17,688
|
525
|
0
|
56
|
1
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
38
|
1
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
|
a Note= Proportions from the initial state
|
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
mat_3_states <- trans.illdeath(names=c("Admission","Therapeutic\nDischarge","Readmission"))
#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
#Los números son determinados por posición en cada columna (o eje Y).
#Si uno quiere definir para la otra fila, salta al siguiente vector:
mat_4_states<- transMat(list(c(2,3,4), 4, 4, c()),
names = c("Admission", "Therapeutic\nDischarge", "Discharge Without\nMedical Advice", "Readmission"))
#illness-death model without recovery
ms_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch", "date_post_treat"),
status = c(NA, "ther_disch", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep,
id = "row",
trans = mat_3_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#model of 5 states without recovery.
ms2_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch",
"date_disch_wo_clin_adv", "date_post_treat"),
status = c(NA, "ther_disch", "disch_wo_clin_adv", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2,
id = "row",
trans = mat_4_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#Starting from state 1, simultaneous transitions possible for subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
invisible(c("This problem responds to differences between treatments 0. Should be resolved in the initial stages"))
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2 %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
#dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>%
View()
}
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
dplyr::select(row, motivodeegreso_mod_imp, contains("fech"))
}
invisible(c("Investigate warning"))
#37595 Administrative discharge 2013-03-18 2013-03-19 2013-03-19
#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#SCALE INTERVALS TO YEARS
ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
Then we present the transition with the frequencies and proportions.
data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9a. Empirical State Transition Matrix, 3 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9a. Empirical State Transition Matrix, 3 States Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.92%
|
|
Admission
|
Readmission
|
4,348
|
20.34%
|
|
Admission
|
no event
|
11,916
|
55.74%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.00%
|
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.79%
|
|
Therapeutic Discharge
|
no event
|
4,051
|
79.21%
|
|
Readmission
|
Admission
|
0
|
0.00%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.00%
|
|
Readmission
|
no event
|
5,411
|
100.00%
|
|
a Note= Proportions from the initial state
|
data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9b. Empirical State Transition Matrix, 4 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state")%>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, 4 States Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.9%
|
|
Admission
|
Discharge Without Medical Advice
|
11,995
|
56.1%
|
|
Admission
|
Readmission
|
753
|
3.5%
|
|
Admission
|
no event
|
3,516
|
16.4%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.0%
|
|
Therapeutic Discharge
|
Discharge Without Medical Advice
|
0
|
0.0%
|
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.8%
|
|
Therapeutic Discharge
|
no event
|
4,051
|
79.2%
|
|
Discharge Without Medical Advice
|
Admission
|
0
|
0.0%
|
|
Discharge Without Medical Advice
|
Therapeutic Discharge
|
0
|
0.0%
|
|
Discharge Without Medical Advice
|
Readmission
|
3,595
|
30.0%
|
|
Discharge Without Medical Advice
|
no event
|
8,400
|
70.0%
|
|
Readmission
|
Admission
|
0
|
0.0%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.0%
|
|
Readmission
|
Discharge Without Medical Advice
|
0
|
0.0%
|
|
Readmission
|
no event
|
5,411
|
100.0%
|
|
a Note= Proportions from the initial state
|
Assessment of Fit of Transitions
We need to derive appropriate functional forms and define respective survival functions. One reason to favor patametric models is that they can be easier to generalize. Seven candidate distributions were considered to model transitions from Adm→Ther.Disch., Adm.→Readm. and Ther.Disch.→Readm., including Weibull (assume a monotonically increasing or decreasing hazard), Log-logistic (non-monotonic hazards), Gompertz (assume a monotonically increasing or decreasing hazard), Log-normal (non-monotonic hazards), Exponential (constant hazard), Gamma & Generalized gamma (more flexible).
The following plots fitted survival curves from each model (coloured lines), with the Kaplan-Meier estimate, in red, obtained from an example of Jackson available here, added to the contributions of Wathers & Cutler available here.
options(warn=-1)
n_iter<-10000
tiempo_antes_fits<-Sys.time()
#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate
#Plan B and other Reports. 927.
#https://digitalcommons.usu.edu/gradreports/927
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
#https://devinincerti.com/2019/01/01/sim-mstate.html
n_trans <- max(mat_3_states, na.rm = TRUE)
fits_wei <- vector(mode = "list", length = n_trans)
fits_weiph <- vector(mode = "list", length = n_trans)
fits_llogis <- vector(mode = "list", length = n_trans)
fits_gomp <- vector(mode = "list", length = n_trans)
fits_logn <- vector(mode = "list", length = n_trans)
fits_exp <- vector(mode = "list", length = n_trans)
fits_gam <- vector(mode = "list", length = n_trans)
fits_ggam <- vector(mode = "list", length = n_trans)
fits_genf <- vector(mode = "list", length = n_trans)
fits_c_wei <- vector(mode = "list", length = n_trans)
fits_c_weiph <- vector(mode = "list", length = n_trans)
fits_c_llogis <- vector(mode = "list", length = n_trans)
fits_c_gomp <- vector(mode = "list", length = n_trans)
fits_c_logn <- vector(mode = "list", length = n_trans)
fits_c_exp <- vector(mode = "list", length = n_trans)
fits_c_gam <- vector(mode = "list", length = n_trans)
fits_c_ggam <- vector(mode = "list", length = n_trans)
fits_c_genf <- vector(mode = "list", length = n_trans)
km.lc<-list()
fits_cox<-list()
fits_c_cox<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig" Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig" Generalized F (original parameterisation)
#"weibull" Weibull
#"gamma" Gamma
#"exp" Exponential
#"lnorm" Log-normal
#"gompertz" Gompertz
library(flexsurv)
#Specify the formula
fitform <- Surv(time, status) ~ 1
for (i in 1:n_trans){
fits_wei[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans){
fits_weiph[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans){
fits_llogis[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans){
fits_gam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans){
fits_ggam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans){
fits_gomp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans){
fits_logn[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans){
fits_exp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans){
fits_genf[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates
#Specify the formula
fitform2 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res
fitform2_t3 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res+ arrival
for (i in 1:(n_trans-1)){
fits_c_wei[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:(n_trans-1)){
fits_c_weiph[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:(n_trans-1)){
fits_c_llogis[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:(n_trans-1)){
fits_c_gam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:(n_trans-1)){
fits_c_ggam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:(n_trans-1)){
fits_c_gomp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:(n_trans-1)){
fits_c_logn[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:(n_trans-1)){
fits_c_exp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:(n_trans-1)){
fits_c_genf[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
fits_c_wei[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "weibull")
fits_c_weiph[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "weibullph")
fits_c_llogis[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "llogis")
fits_c_gam[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gamma")
fits_c_ggam[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gengamma")
fits_c_gomp[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "gompertz")
fits_c_logn[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "lnorm")
fits_c_exp[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "exp")
fits_c_genf[[n_trans]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == n_trans),
dist = "genf")
for (i in 1:n_trans){
km.lc[[i]] <- survfit(formula= fitform, data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 20.54102 mins
transition_label<- c(`1`="Transition 1: Admission to Ther.Discharge",`2`="Transition 2: Admission to Readmission",`3`="Transition 3: Ther.Discharge to Readmission")
startTime <- Sys.time()
layout(matrix(1:3, nc = 1, byrow = F))
for (i in 1:n_trans){
plot(km.lc[[i]], col="red", conf.int=F, xlim=c(0,12));
lines(fits_wei[[i]], col="coral4", ci=F);
lines(fits_weiph[[i]], col="navyblue", ci=F);
lines(fits_gomp[[i]], col="lightpink", ci=F);
lines(fits_llogis[[i]], col="gray25", ci=F);#A0A36D
lines(fits_gam[[i]], col="darkorchid4", ci=F);
lines(fits_ggam[[i]], col="#496A72", ci=F);
lines(fits_logn[[i]], col="gray70", ci=F);
lines(fits_exp[[i]],col="#A0A36D", ci=F);
lines(fits_exp[[i]],col="cadetblue", ci=F)
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col =
c("red","coral4","navyblue","lightpink","gray25",#"#A0A36D","#886894",
"darkorchid4","#496A72","gray70","#A0A36D", "cadetblue"),
title = "Distributions", cex = .95, bty = "n", lty=1,lwd=3)# lty = 1:2,
title(main=transition_label[[i]])
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - startTime
## [1] "Time in process: "
## Time difference of 0.615978 secs
#23 min aprox.
#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)
options(warn=-1)
n_iter<-10000
tiempo_antes_fits<-Sys.time()
#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate
#Plan B and other Reports. 927.
#https://digitalcommons.usu.edu/gradreports/927
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
#https://devinincerti.com/2019/01/01/sim-mstate.html
n_trans2 <- max(mat_4_states, na.rm = TRUE)
fits_wei2 <- vector(mode = "list", length = n_trans2)
fits_weiph2 <- vector(mode = "list", length = n_trans2)
fits_llogis2 <- vector(mode = "list", length = n_trans2)
fits_gomp2 <- vector(mode = "list", length = n_trans2)
fits_logn2 <- vector(mode = "list", length = n_trans2)
fits_exp2 <- vector(mode = "list", length = n_trans2)
fits_gam2 <- vector(mode = "list", length = n_trans2)
fits_ggam2 <- vector(mode = "list", length = n_trans2)
fits_genf2 <- vector(mode = "list", length = n_trans2)
fits_c_wei2 <- vector(mode = "list", length = n_trans2)
fits_c_weiph2 <- vector(mode = "list", length = n_trans2)
fits_c_llogis2 <- vector(mode = "list", length = n_trans2)
fits_c_gomp2 <- vector(mode = "list", length = n_trans2)
fits_c_logn2 <- vector(mode = "list", length = n_trans2)
fits_c_exp2 <- vector(mode = "list", length = n_trans2)
fits_c_gam2 <- vector(mode = "list", length = n_trans2)
fits_c_ggam2 <- vector(mode = "list", length = n_trans2)
fits_c_genf2 <- vector(mode = "list", length = n_trans2)
km.lc2<-list()
fits_cox2<-list()
fits_c_cox2<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig" Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig" Generalized F (original parameterisation)
#"weibull" Weibull
#"gamma" Gamma
#"exp" Exponential
#"lnorm" Log-normal
#"gompertz" Gompertz
library(flexsurv)
#Specify the formula
fitform_4s <- Surv(time, status) ~ 1
for (i in 1:n_trans2){
fits_wei2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans2){
fits_weiph2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans2){
fits_llogis2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans2){
fits_gam2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans2){
fits_ggam2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans2){
fits_gomp2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans2){
fits_logn2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans2){
fits_exp2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans2){
fits_genf2[[i]] <- flexsurvreg(formula=fitform_4s,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates
#Specify the formula
fitform2 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res
fitform2_t3 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res+ arrival
for (i in 1:(n_trans2-2)){
fits_c_wei2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:(n_trans2-2)){
fits_c_weiph2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:(n_trans2-2)){
fits_c_llogis2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:(n_trans2-2)){
fits_c_gam2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:(n_trans2-3)){
fits_c_ggam2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
#https://stackoverflow.com/questions/47100140/estimating-survival-with-a-generalized-gamma-function-in-flexsurv-fails
#optim uses a finite-difference approximation for the gradient as stated in help("optim")
fits_c_ggam2[[3]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == 3),
dist = "gengamma", inits=c(.0001, mean(subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == 3)[,"time"],na.rm=T)))
#A function of the uncensored survival times t, which returns a vector of reasonable initial values for maximum likelihood estimation of each parameter. For example, function(t){ c(1, mean(t)) } will always initialize the first of two parameters at 1, and the second (a scale parameter, for instance) at the mean of t.
for (i in 1:(n_trans2-2)){
fits_c_gomp2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:(n_trans2-2)){
fits_c_logn2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:(n_trans2-2)){
fits_c_exp2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:(n_trans2-2)){
fits_c_genf2[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "genf")
}
##4th state
fits_c_wei2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "weibull")
fits_c_weiph2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "weibullph")
fits_c_llogis2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "llogis")
fits_c_gam2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gamma")
fits_c_ggam2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gengamma")
fits_c_gomp2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "gompertz")
fits_c_logn2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "lnorm")
fits_c_exp2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "exp")
fits_c_genf2[[(n_trans2-1)]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-1)),
dist = "genf")
#5th state
fits_c_wei2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "weibull")
fits_c_weiph2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "weibullph")
fits_c_llogis2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "llogis")
fits_c_gam2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gamma")
fits_c_ggam2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gengamma")
fits_c_gomp2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "gompertz")
fits_c_logn2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "lnorm")
fits_c_exp2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "exp")
fits_c_genf2[[n_trans2]] <- flexsurvreg(formula=fitform2_t3,
data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == (n_trans2-0)),
dist = "genf")
for (i in 1:n_trans2){
km.lc2[[i]] <- survfit(formula= fitform_4s, data = subset(ms2_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 30.5549 mins
transition_label_4s<- c(`1`="Transition 1: Admission to Ther.Discharge",`2`="Transition 2: Admission to Discharge w/o Clinical Advice",`3`="Transition 3: Admission to Readmission",`4`="Transition 4: Ther.Discharge to Readmission", `5`="Transition 5: Discharge w/o Clinical Advice to Readmission")
startTime <- Sys.time()
layout(matrix(1:6, nc = 2, byrow = F))
for (i in 1:n_trans2){
plot(km.lc2[[i]], col="red", conf.int=F, xlim=c(0,12));
lines(fits_wei2[[i]], col="coral4", ci=F);
lines(fits_weiph2[[i]], col="navyblue", ci=F);
lines(fits_gomp2[[i]], col="lightpink", ci=F);
lines(fits_llogis2[[i]], col="gray25", ci=F);##A0A36D
lines(fits_gam2[[i]], col="darkorchid4", ci=F);##886894
lines(fits_ggam2[[i]], col="#496A72", ci=F);
lines(fits_logn2[[i]], col="gray70", ci=F);
lines(fits_exp2[[i]],col="#A0A36D", ci=F);
lines(fits_genf2[[i]],col="cadetblue", ci=F)
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col =
c("red","coral4","navyblue","lightpink","gray25",#"#B89673",##886894
"darkorchid4","#496A72","gray70","#A0A36D","cadetblue"),
title = "Distributions", cex = .95, bty = "n", lty=1.2,lwd=3)# lty = 1:2,
title(main=transition_label_4s[[i]])
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - startTime
## [1] "Time in process: "
## Time difference of 1.02408 secs
#23 min aprox.
#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)
Additionally, we compared the hazards of these distributions with non-parametric techniques, using a smoothed hazard function from right-censored data using kernel-based methods (as Incerti shows in his blog). The confidence interval around the predicted hazard function is estimated using bootstrapping methods of 10,000 iterations. Confidence intervals are obtained by sampling randomly from the asymptotic normal distribution of the maximum likelihood estimates and then taking quantiles (see, e.g. Mandel, 2013).
#Micha Mandel (2013) Simulation-Based Confidence Intervals for Functions With Complicated Derivatives, The American Statistician, 67:2, 76-81, DOI: 10.1080/00031305.2013.783880
# https://rpubs.com/martina_morris/testhazard
tiempo_antes_fits2<-Sys.time()
newtime2 = seq(from=1/24, to= 15, by=1/12)
#```{r fit,eval=T, echo=T, paged.print=TRUE, fig.height=13, fig.width=10, fig.align="center", results = 'asis'}
kernel_haz_est<-list()
kernel_haz<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"status"])
kernel_haz[[i]] <- data.table(time = kernel_haz_est[[i]]$est.grid,
est = kernel_haz_est[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only<-
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz[[i]])),
rbindlist(kernel_haz))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create list of survreg for different transitions
#<- flexsurvreg_list(fit1, fit2, fit3)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dists_wo_covs <- cbind.data.frame(covs=c(rep("fits_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp", "genf"),1))
dists_w_covs <- cbind.data.frame(covs=c(rep("fits_c_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp", "genf"),1))
#hr.exp <- round(exp(coef(get(paste0("fits_",dists[x,"model"]))[[y]])["groupGood"]),3)
#WO COVS
fitted_flexsurvreg2a<-data.frame()
fit_flexsurvreg2a<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_wo_covs)){
cat(paste0("#### Flexible Survival Model (w/o covs): ",
dists_wo_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_wo_covs[x,"covs"],dists_wo_covs[x,"model"])
#FITTED LINES
#Lines
est_by_time<-
data.table::data.table(summary(get(mod_flexsurv)[[y]], ci = F, t= newtime2, B=10000,type = "hazard", tidy=T))
#dataframe
fitted_flexsurvreg2a<-rbind.data.frame(fitted_flexsurvreg2a,
cbind.data.frame(dist= rep(dists_wo_covs[x,"formal"],),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2a<-rbind(fit_flexsurvreg2a,
cbind(dist= dists_wo_covs[x,"formal"], trans=y,
covs="w/o covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 3
##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only1_binned<-
haz_int_only %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
#180 rows vs. 303 in haz int only
fitted_flexsurvreg2a_binned<-
fitted_flexsurvreg2a[,c("dist","trans","time","est")] %>%
dplyr::filter(time<=max(haz_int_only$time)) %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans, dist) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2a_binned_mix<-
fitted_flexsurvreg2a_binned %>%
dplyr::left_join(haz_int_only1_binned, by=c("trans", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_a<-
cbind.data.frame(
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),3),
trans=rep(c(1:3),each=9))
rmse_comp_fits_2a<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_a)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2a_binned_mix[complete.cases(fitted_flexsurvreg2a_binned_mix),],
dist==db_for_apply_rmse_a[i,"dist"] &
trans==db_for_apply_rmse_a[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2a_binned_mix[complete.cases(fitted_flexsurvreg2a_binned_mix),],
dist==db_for_apply_rmse_a[i,"dist"] &
trans==db_for_apply_rmse_a[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2a<- rbind(rmse_comp_fits_2a,cbind(dist=db_for_apply_rmse_a[i,"dist"],
trans=db_for_apply_rmse_a[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2a<-
rmse_comp_fits_2a %>%
dplyr::mutate(rmse=round(as.numeric(rmse),4)) %>%
dplyr::arrange(trans,rmse)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
haz <- rbind(haz_int_only, fitted_flexsurvreg2a)
#brown burlywood3 blueviolet blue4 cadetblue4
haz_plot_int_only<-
haz %>%
dplyr::mutate(dist=factor(dist,levels=c("Kernel density",dists_wo_covs$formal))) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("black","#f54b96","#00e9b1","#69b763",
"#166000","#b27ff9","#fa863b","#013eab","#a7aa48","#b34b40","darkred"))+
#c("black",brewer.pal(n = 9, name = 'Paired')))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
scale_x_continuous(breaks = seq(1, 15, 2))+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()) +
labs(y="Hazard",x="Time (years)")
fit_plot_int_only<-
fit_flexsurvreg2a %>%
melt(id.vars=c("dist","trans","covs","npars","pars")) %>%
dplyr::filter(variable!="Llik") %>%
dplyr::mutate(dist=factor(dist, levels= dists_wo_covs$formal),
value=as.numeric(value)) %>%
ggplot(aes(dist, value, fill=variable))+
geom_bar(stat= "identity")+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_fill_manual(name="Indices", values = c("gray30","gray70"))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
axis.text.x= element_text(angle=45, vjust=0.5),
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.title.x=element_blank())+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank())+
labs(y="Value")
haz_plot_int_only
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso3.jpg", height=14, width= 10, res= 96, units = "in")
haz_plot_int_only
dev.off()
}
Visual assessments seems to show that for the first 2 transitions, the Gompertz distribution may reproduce more adequately the extrapolation of hazards posterior to 5 years, and Log-normal may reproduce adequately the third transition. However, Gompertz, Generalized gamma and Log-logistic show lower AICs for each transitions. Must consider that differences between distributions are really low.
For the first two transitions, Generalized gamma distributions reproduced more accurately the expected number of events in the interval between 0 and six months, compared to the other models that tended to overestimate hazards around the first months. The only exception is in the third transition, being not capable to capture rates in the first year. Gompertz extrapolations tend to produce more realistic estimations after 5 years, compared to a Generalized gamma model which appear to extrapolate higher readmission rates after 5 years.
Gompertz (Mean RMSE= 0.046; AIC= 33,468.79; Mean RMSE(2)= 0.0121; AIC(2)= 31,498.31) and Generalized gamma (Mean RMSE= 0.046; AIC= 33,468.79;Mean RMSE(2)= 0.0121; AIC(2)= 31,498.31) distributions showed better indices for the first 2 transitions, while Log-logistic showed better indices in the third transition (Mean RMSE= 0.0159; AIC= 7,439.743)
We looked over estimations and confidence intervals (made by resamples of 10,000 iterations), compared to the mentioned smooth estimate of the hazard function for censored data. The main difference is these models contain covariates of interest for the study. To extrapolate confidence intervals, we defined a different model for the third transition, including the time of arrival to the intermediate state.
options(warn=-1)
kernel_haz_est2a<-list()
kernel_haz_est2b<-list()
kernel_haz2a<-list()
kernel_haz2b<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est2a[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"status"])
kernel_haz2a[[i]] <- data.table(time = kernel_haz_est2a[[i]]$est.grid,
est = kernel_haz_est2a[[i]]$haz.est,
dist = "Kernel density")
kernel_haz_est2b[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"status"])
kernel_haz2b[[i]] <- data.table(time = kernel_haz_est2b[[i]]$est.grid,
est = kernel_haz_est2b[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only2<-
rbind(cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2a[[i]])),
tipo_programa=rep(1,nrow(kernel_haz2a[[i]])),
rbindlist(kernel_haz2a)),
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2b[[i]])),
tipo_programa=rep(0,nrow(kernel_haz2b[[i]])),
rbindlist(kernel_haz2b)))
# Database to contrast adjustments
newdat2 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)))
newdat2_t3 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)),
arrival=0)
## W COVS
fitted_flexsurvreg2b<-data.frame()
fit_flexsurvreg2b<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_w_covs)){
cat(paste0("#### Flexible Survival Model (w/ covs): ",
dists_w_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_w_covs[x,"covs"],dists_w_covs[x,"model"])
#FITTED LINES
#Lines
if(y==n_trans){
est_by_time<- data.table::data.table(summary(get(mod_flexsurv)[[y]],newdata=newdat2_t3,t=newtime2,B=10000,type="hazard",tidy=T))} else{
est_by_time<- data.table::data.table(cbind.data.frame(summary(get(mod_flexsurv)[[y]],newdata=newdat2,t=newtime2,B=10000,type="hazard",tidy=T),arrival=rep(0,)))
}
#"survival" for survival probabilities.
#"cumhaz" for cumulative hazards.
#"hazard" for hazards.
#"rmst" for restricted mean survival.
#"mean" for mean survival.
#"median" for median survival (alternative to type="quantile" with quantiles=0.5).
#"quantile" for quantiles of the survival time distribution.
#"link" for the fitted value of the location parameter (i.e. the "linear predictor")
#dataframe
fitted_flexsurvreg2b<-rbind.data.frame(fitted_flexsurvreg2b,
cbind.data.frame(dist= rep(dists_w_covs[x,"formal"],),
hr= round(exp(coef(get(mod_flexsurv)[[y]])["factor(tipo_de_programa_2)1"]),3),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2b<-rbind(fit_flexsurvreg2b,
cbind(dist= dists_w_covs[x,"formal"],
trans=y,
covs="w/ covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 3
##
tipo_programa_label<- c(`0`="General Population",`1`="Women specific")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
if(no_mostrar==1){
fitted_flexsurvreg2b %>%
dplyr::group_by(trans) %>%
summarise(mean(ucl,na.rm=T))
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only2_binned<-
haz_int_only2 %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, tipo_programa) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, tipo_programa, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::rename("tipo_de_programa_2"="tipo_programa") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2))%>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'tipo_programa', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2b_binned<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","factor(tipo_de_programa_2)")] %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::filter(time<=max(haz_int_only2$time)) %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, dist, tipo_de_programa_2) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, tipo_de_programa_2, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist', 'tipo_de_programa_2' (override with `.groups` argument)
fitted_flexsurvreg2b_binned_mix<-
fitted_flexsurvreg2b_binned %>%
dplyr::left_join(haz_int_only2_binned, by=c("trans","tipo_de_programa_2", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse<-
cbind.data.frame(tipo_prog=rep(c("0","1"),each=9,3),
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),2*3),
trans=rep(c(1:3),each=9*2))
rmse_comp_fits_2b<- data.frame()
for(i in 1:nrow(db_for_apply_rmse)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2b<- rbind(rmse_comp_fits_2b,cbind(dist=db_for_apply_rmse[i,"dist"],
tipo_prog=db_for_apply_rmse[i,"tipo_prog"],
trans=db_for_apply_rmse[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2b<-
rmse_comp_fits_2b %>%
tidyr::pivot_wider(names_from = tipo_prog, values_from = rmse) %>%
dplyr::rename("gp"="0","we"="1") %>%
dplyr::mutate(gp=as.numeric(gp),we=as.numeric(we)) %>%
dplyr::mutate(mean_rmse=rowSums(.[3:4])/2) %>%
dplyr::arrange(trans,dist, mean_rmse)%>%
dplyr::mutate(mean_rmse=round(mean_rmse,4))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html
plot_fitted_flexsurvreg2b<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","lcl","ucl","factor(tipo_de_programa_2)")] %>%
rbind(cbind(dplyr::rename(haz_int_only2, "factor(tipo_de_programa_2)"="tipo_programa"),
"lcl"=haz_int_only2$est,"ucl"=haz_int_only2$est)) %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2),
dist=factor(dist, levels=c("Kernel density",dists_w_covs$formal)),
trans=factor(trans)) %>%
#dplyr::group_by(tipo_de_programa_2,dist,time,trans) %>% summarise(n=n()) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
scale_fill_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
facet_wrap(tipo_de_programa_2~trans,labeller = labeller(trans = transition_label, tipo_de_programa_2=tipo_programa_label))+
sjPlot::theme_sjplot2()+
#ylim(0,.3)+
scale_x_continuous(breaks = seq(0, 15, 2))+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
labs(y="Hazard",x="Time (years)",caption="Note. Kernel density, stratified by type of program")
plot_fitted_flexsurvreg2b
## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits2<-Sys.time()
paste0("Time in process: ");tiempo_despues_fits2-tiempo_antes_fits2
## [1] "Time in process: "
## Time difference of 2.248819 mins
#13 minutos aprox. en DELL
options(warn=0)
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso4.jpg", height=10, width= 10, res= 96, units = "in")
plot_fitted_flexsurvreg2b
dev.off()
}
There was very little to choose between the distributions, but we aimed to choose the distribution that achieve the best balance of a reasonable fit to the observed data and a sensible extrapolation. A Gompertz was chosen for the first transition (Mean RMSE= 0.0352; AIC= 31,396.44). The distribution for the second transition was Generalized gamma (Mean RMSE= 0.0188; AIC= 30,796.29). We decided to choose the third transition as a Log-logistic distribution because it shows better indices (Mean RMSE= 0.0351; AIC= 7,089.797).
4 states
#Micha Mandel (2013) Simulation-Based Confidence Intervals for Functions With Complicated Derivatives, The American Statistician, 67:2, 76-81, DOI: 10.1080/00031305.2013.783880
# https://rpubs.com/martina_morris/testhazard
tiempo_antes_fits2<-Sys.time()
newtime2 = seq(from=1/24, to= 15, by=1/12)
mam
## Error in eval(expr, envir, enclos): objeto 'mam' no encontrado
kernel_haz_est_4s<-list()
kernel_haz_4s<-list()
for (i in 1:n_trans2){
library("muhaz")
kernel_haz_est_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"status"])
kernel_haz_4s[[i]] <- data.table(time = kernel_haz_est_4s[[i]]$est.grid,
est = kernel_haz_est_4s[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only_4s<-
cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz_4s[[i]])),
rbindlist(kernel_haz_4s))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create list of survreg for different transitions
#<- flexsurvreg_list(fit1, fit2, fit3)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dists_wo_covs_4s <- cbind.data.frame(covs=c(rep("fits_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei2", "weiph2", "gomp2", "llogis2", "gam2","ggam2", "logn2", "exp2", "genf2"),1))
dists_w_covs_4s <- cbind.data.frame(covs=c(rep("fits_c_",9)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1),
model=rep(c("wei2", "weiph2", "gomp2", "llogis2", "gam2","ggam2", "logn2", "exp2", "genf2"),1))
#WO COVS
fitted_flexsurvreg2a_4s<-data.frame()
fit_flexsurvreg2a_4s<-data.frame()
for (y in 1:n_trans2){
for (x in 1:nrow(dists_wo_covs_4s)){
cat(paste0("#### Flexible Survival Model (w/o covs): ",
dists_wo_covs_4s[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv2<-paste0(dists_wo_covs_4s[x,"covs"],dists_wo_covs_4s[x,"model"])
#FITTED LINES
#Lines
est_by_time2<-
data.table::data.table(summary(get(mod_flexsurv2)[[y]], ci = F, t= newtime2, B=10000,type = "hazard", tidy=T))
#dataframe
fitted_flexsurvreg2a_4s<-rbind.data.frame(fitted_flexsurvreg2a_4s,
cbind.data.frame(dist= rep(dists_wo_covs_4s[x,"formal"],),
trans=rep(y,),
est_by_time2))
#
fit_flexsurvreg2a_4s<-rbind(fit_flexsurvreg2a_4s,
cbind(dist= dists_wo_covs_4s[x,"formal"], trans=y,
covs="w/o covs",
AIC= get(mod_flexsurv2)[[y]]$AIC,
Llik= get(mod_flexsurv2)[[y]]$loglik,
npars= get(mod_flexsurv2)[[y]]$npars,
pars= get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik,
BIC= get(mod_flexsurv2)[[y]]$loglik+ log(get(mod_flexsurv2)[[y]]$N)* (get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 4
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 4
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 4
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 4
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 4
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 4
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 4
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 4
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 4
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 5
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 5
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 5
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 5
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 5
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 5
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 5
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 5
##
## #### Flexible Survival Model (w/o covs): Generalized F; transition: 5
##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only1_4s_binned<-
haz_int_only_4s %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only_4s$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
#180 rows vs. 303 in haz int only
fitted_flexsurvreg2a_4s_binned<-
fitted_flexsurvreg2a_4s[,c("dist","trans","time","est")] %>%
dplyr::filter(time<=max(haz_int_only_4s$time)) %>%
dplyr::mutate(time=ifelse(time<.05,.05,time)) %>%
dplyr::group_by(trans, dist) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only_4s$time),.05))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2a_4s_binned_mix<-
fitted_flexsurvreg2a_4s_binned %>%
dplyr::left_join(haz_int_only1_4s_binned, by=c("trans", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_c<-
cbind.data.frame(
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),5),
trans=rep(c(1:5),each=9))
rmse_comp_fits_2c<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_c)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2a_4s_binned_mix[complete.cases(fitted_flexsurvreg2a_4s_binned_mix),],
dist==db_for_apply_rmse_c[i,"dist"] &
trans==db_for_apply_rmse_c[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2a_4s_binned_mix[complete.cases(fitted_flexsurvreg2a_4s_binned_mix),],
dist==db_for_apply_rmse_c[i,"dist"] &
trans==db_for_apply_rmse_c[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2c<- rbind(rmse_comp_fits_2c,cbind(dist=db_for_apply_rmse_c[i,"dist"],
trans=db_for_apply_rmse_c[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2c<-
rmse_comp_fits_2c %>%
dplyr::mutate(rmse=round(as.numeric(rmse),4)) %>%
dplyr::arrange(trans,rmse)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
haz_4s <- rbind(haz_int_only_4s, fitted_flexsurvreg2a_4s)
#brown burlywood3 blueviolet blue4 cadetblue4
haz_plot_4s_int_only<-
haz_4s %>%
dplyr::mutate(dist=factor(dist,levels=c("Kernel density",dists_wo_covs_4s$formal))) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("black","#f54b96","#00e9b1","#69b763",
"#166000","#b27ff9","#fa863b","#013eab","#a7aa48","#b34b40","darkred"))+
#c("black",brewer.pal(n = 9, name = 'Paired')))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(.~trans,labeller = labeller(trans = transition_label_4s), ncol=1, scales = "free_y")+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
scale_x_continuous(breaks = seq(1, 15, 2))+
#ylim(0,1.25)+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()) +
labs(y="Hazard",x="Time (years)")
fit_plot_int_4s_only<-
fit_flexsurvreg2a_4s %>%
melt(id.vars=c("dist","trans","covs","npars","pars")) %>%
dplyr::filter(variable!="Llik") %>%
dplyr::mutate(dist=factor(dist, levels= dists_wo_covs_4s$formal),
value=as.numeric(value)) %>%
ggplot(aes(dist, value, fill=variable))+
geom_bar(stat= "identity")+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_fill_manual(name="Indices", values = c("gray30","gray70"))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label_4s), ncol=1)+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
axis.text.x= element_text(angle=45, vjust=0.5),
strip.background = element_blank(),
strip.text.x = element_blank(),
axis.title.x=element_blank())+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank())+
labs(y="Value")
#grid.arrange(haz_plot_4s_int_only,fit_plot_int_4s_only, heights=c(2,1.5))
haz_plot_4s_int_only
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso9.jpg", height=17, width= 10, res= 96, units = "in")
haz_plot_4s_int_only
#grid.arrange(haz_plot_4s_int_only,fit_plot_int_4s_only, heights=c(2,1.5))
dev.off()
}
We selected the distribution of the first transition as Generalized gamma (Mean RMSE= 0.1664; AIC= NA). For the second transition, we selected the Gompertz distribution (Mean RMSE= 0.1082; AIC= 31,786.57). In the third transition we must observe that after the 14th year for the third transition, the Generalized gamma shows best estimated hazards, but increases over 1.25 to an abnormal projection of 14,615 at year 14 and after, and none of the distribution reproduced very well the hazards (Mean RMSE= 0.013; AIC= 6,181.992). For the fourth transition, Log-logistic distribution fitted better compared to the other distributions (Mean RMSE= 0.0152; AIC= 7,438.223). For the fifth transition, we selected the Lognormal distribution (Mean RMSE= 0.0202; AIC= 7,466.745).
We looked over estimations and confidence intervals (made by resamples of 10,000 iterations), compared to the mentioned smooth estimate of the hazard function for censored data. The main difference is these models contain covariates of interest for the study. To extrapolate confidence intervals, we defined a different model for the fourth and fifth transition.
options(warn=-1)
tiempo_antes_fits22<- Sys.time()
kernel_haz_est2a_4s<-list()
kernel_haz_est2b_4s<-list()
kernel_haz2a_4s<-list()
kernel_haz2b_4s<-list()
for (i in 1:n_trans2){
library("muhaz")
kernel_haz_est2a_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"status"])
kernel_haz2a_4s[[i]] <- data.table(time = kernel_haz_est2a_4s[[i]]$est.grid,
est = kernel_haz_est2a_4s[[i]]$haz.est,
dist = "Kernel density")
kernel_haz_est2b_4s[[i]] <- muhaz(ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"time"],
ms2_CONS_C1_SEP_2020_women_imputed_exp[which(ms2_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms2_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"status"])
kernel_haz2b_4s[[i]] <- data.table(time = kernel_haz_est2b_4s[[i]]$est.grid,
est = kernel_haz_est2b_4s[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only2_4s<-
rbind(cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz2a_4s[[i]])),
tipo_programa=rep(1,nrow(kernel_haz2a_4s[[i]])),
rbindlist(kernel_haz2a_4s)),
cbind(trans=rep(1:n_trans2,each=nrow(kernel_haz2b_4s[[i]])),
tipo_programa=rep(0,nrow(kernel_haz2b_4s[[i]])),
rbindlist(kernel_haz2b_4s)))
# Database to contrast adjustments
newdat2_4s <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)))
newdat2_t45 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2)),
arrival=0)
## W COVS
fitted_flexsurvreg2b_4s<-data.frame()
fit_flexsurvreg2b_4s<-data.frame()
for (y in 1:n_trans2){
for (x in 1:nrow(dists_w_covs_4s)){
cat(paste0("#### Flexible Survival Model (w/ covs): ",
dists_w_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv2<-paste0(dists_w_covs_4s[x,"covs"],dists_w_covs_4s[x,"model"])
#FITTED LINES
#Lines
if(y%in% c(4,5)){
est_by_time2<- data.table::data.table(summary(get(mod_flexsurv2)[[y]],newdata=newdat2_t45,t=newtime2,B=10000,type="hazard",tidy=T))} else{
est_by_time2<- data.table::data.table(cbind.data.frame(summary(get(mod_flexsurv2)[[y]],newdata=newdat2_4s,t=newtime2,B=10000,type="hazard",tidy=T),arrival=rep(0,)))
}
#"survival" for survival probabilities.
#"cumhaz" for cumulative hazards.
#"hazard" for hazards.
#"rmst" for restricted mean survival.
#"mean" for mean survival.
#"median" for median survival (alternative to type="quantile" with quantiles=0.5).
#"quantile" for quantiles of the survival time distribution.
#"link" for the fitted value of the location parameter (i.e. the "linear predictor")
#dataframe
fitted_flexsurvreg2b_4s<-rbind.data.frame(fitted_flexsurvreg2b_4s,
cbind.data.frame(dist= rep(dists_w_covs_4s[x,"formal"],),
hr= round(exp(coef(get(mod_flexsurv2)[[y]])["factor(tipo_de_programa_2)1"]),3),
trans=rep(y,),
est_by_time2))
#
fit_flexsurvreg2b_4s<-rbind(fit_flexsurvreg2b_4s,
cbind(dist= dists_w_covs_4s[x,"formal"],
trans=y,
covs="w/ covs",
AIC= get(mod_flexsurv2)[[y]]$AIC,
Llik= get(mod_flexsurv2)[[y]]$loglik,
npars= get(mod_flexsurv2)[[y]]$npars,
pars= get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik,
BIC= get(mod_flexsurv2)[[y]]$loglik+ log(get(mod_flexsurv2)[[y]]$N)* (get(mod_flexsurv2)[[y]]$AIC/2 + get(mod_flexsurv2)[[y]]$loglik)
#the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.
)
)
}
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 4
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 4
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 4
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 4
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 4
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 4
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 4
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 4
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 4
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 5
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 5
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 5
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 5
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 5
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 5
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 5
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 5
##
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 5
##
#### Flexible Survival Model (w/ covs): Exponential; transition: 2
tipo_programa_label<- c(`0`="General Population",`1`="Women specific")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
if(no_mostrar==1){
fitted_flexsurvreg2b_4s %>%
dplyr::group_by(trans) %>%
summarise(mean(ucl,na.rm=T))
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only2_binned<-
haz_int_only2_4s %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, tipo_programa) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2_4s$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, tipo_programa, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::rename("tipo_de_programa_2"="tipo_programa") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2))%>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'tipo_programa', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2b_4s_binned<-
fitted_flexsurvreg2b_4s[,c("dist","trans","time","est","factor(tipo_de_programa_2)")] %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::filter(time<=max(haz_int_only2_4s$time)) %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, dist, tipo_de_programa_2) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2_4s$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, tipo_de_programa_2, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist', 'tipo_de_programa_2' (override with `.groups` argument)
fitted_flexsurvreg2b_4s_binned_mix<-
fitted_flexsurvreg2b_4s_binned %>%
dplyr::left_join(haz_int_only2_binned, by=c("trans","tipo_de_programa_2", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse_d<-
cbind.data.frame(tipo_prog=rep(c("0","1"),each=9,5),
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential", "Generalized F"),2*5),
trans=rep(c(1:5),each=9*2))
rmse_comp_fits_2b_4s<- data.frame()
for(i in 1:nrow(db_for_apply_rmse_d)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2b_4s_binned_mix[complete.cases(fitted_flexsurvreg2b_4s_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse_d[i,"tipo_prog"] &
dist==db_for_apply_rmse_d[i,"dist"] &
trans==db_for_apply_rmse_d[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2b_4s_binned_mix[complete.cases(fitted_flexsurvreg2b_4s_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse_d[i,"tipo_prog"] &
dist==db_for_apply_rmse_d[i,"dist"] &
trans==db_for_apply_rmse_d[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2b_4s<- rbind(rmse_comp_fits_2b_4s,cbind(dist=db_for_apply_rmse_d[i,"dist"],
tipo_prog=db_for_apply_rmse_d[i,"tipo_prog"],
trans=db_for_apply_rmse_d[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2b_4s<-
rmse_comp_fits_2b_4s %>%
tidyr::pivot_wider(names_from = tipo_prog, values_from = rmse) %>%
dplyr::rename("gp"="0","we"="1") %>%
dplyr::mutate(gp=as.numeric(gp),we=as.numeric(we)) %>%
dplyr::mutate(mean_rmse=rowSums(.[3:4])/2) %>%
dplyr::arrange(trans,dist, mean_rmse)%>%
dplyr::mutate(mean_rmse=round(mean_rmse,4))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html
plot_fitted_flexsurvreg2b_4s<-
fitted_flexsurvreg2b_4s[,c("dist","trans","time","est","lcl","ucl","factor(tipo_de_programa_2)")] %>%
rbind(cbind(dplyr::rename(haz_int_only2_4s, "factor(tipo_de_programa_2)"="tipo_programa"),
"lcl"=haz_int_only2_4s$est,"ucl"=haz_int_only2_4s$est)) %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2),
dist=factor(dist, levels=c("Kernel density",dists_w_covs$formal)),
trans=factor(trans)) %>%
#dplyr::group_by(tipo_de_programa_2,dist,time,trans) %>% summarise(n=n()) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","greenyellow")) +
scale_fill_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","greenyellow")) +
facet_wrap(tipo_de_programa_2~trans,labeller = labeller(trans = transition_label_4s, tipo_de_programa_2=tipo_programa_label),ncol=2, dir="v", scales="free_y")+
sjPlot::theme_sjplot2()+
#ylim(0,.3)+
scale_x_continuous(breaks = seq(0, 15, 2))+
theme(legend.position="bottom",
strip.background = element_rect(fill = "white", colour = "white"))+
labs(y="Hazard",x="Time (years)",caption="Note. Kernel density, stratified by type of program")
plot_fitted_flexsurvreg2b_4s
## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits22<-Sys.time()
paste0("Time in process: ");tiempo_despues_fits22-tiempo_antes_fits22
## [1] "Time in process: "
## Time difference of 3.393575 mins
#13 minutos aprox. en DELL
options(warn=0)
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso13.jpg", height=17, width= 10, res= 96, units = "in")
plot_fitted_flexsurvreg2b_4s
dev.off()
}
Due to convergence issues, we defined the third transition for the Generalized gamma distribution using initial values of 0.0001 and 1.056. There was very little to choose between the distributions, but we aimed to choose the distribution that achieve the best balance of a reasonable fit to the observed data and a sensible extrapolation. A Generalized gamma was chosen for the first transition (Mean RMSE= 0.1481; AIC= 22,974.43). The distribution for the second transition was Gompertz (Mean RMSE= 0.0781; AIC= 30,611.54). For the third transition, we decided to choose a Lognormal, considering that Generalized Gamma distributions required to specify starting values (Mean RMSE= 0.02; AIC= 6,174.837). For the fourth transition, we decided to choose a Log-logistic because it shows a lower AIC (Mean RMSE= 0.0351; AIC= 7,089.797). For the fifth transition, we decided to choose a Lognormal (Mean RMSE= 0.0436; AIC= 22,370.96)
We decided to not to include by now the Generalized F yet because we are still studying its behavior. However, it shoed to be a good candidate for transitions 1 and 2, as welll as transition 4 and 5 (but not the better for these two).